{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "from keras.layers.core import regularizers\n",
    "from keras.layers import Input,Dense,LSTM\n",
    "from keras.models import Model\n",
    "from sklearn.metrics import mean_squared_error\n",
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 读取数据\n",
    "df = pd.read_csv('data.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 容量剩余\n",
    "df['cpu_usable'] = df['cpu_total'] - df['cpu_used']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 转换时间格式\n",
    "df['record_time'] = df['record_time'].apply(lambda x: x.replace('/','-'))\n",
    "df['record_time'] = df['record_time'].apply(lambda x: datetime.datetime.strptime(x,'%Y-%m-%d %H:%M:%S'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 原始数据\n",
    "f = plt.figure(figsize=(20,5))\n",
    "plt.plot(df['record_time'],df['cpu_usable'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 上图可以看出：数据存在缺失\n",
    "# 找出数据缺失点\n",
    "df['delta']=df['record_time']-df['record_time'].shift(1)\n",
    "df_drop = df[df.delta>datetime.timedelta(minutes=20)]\n",
    "df_drop.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(df_drop)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 共114处存在数据缺失，数据共有56526条，缺失部分占比很小，因此可以舍弃缺失处数据\n",
    "# 同时发现在2017年12月份数据缺失最大，决定将2017/12/1之前数据用作训练集，之后数据用作测试集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "##  数据标准化\n",
    "##  为避免未来函数影响，标准化方式为 （数据-训练集均值）/训练集标准差\n",
    "train_mean = np.mean(df[df.record_time<'2017-12-01']['cpu_usable'])\n",
    "train_std = np.std(df[df.record_time<'2017-12-01']['cpu_usable'])\n",
    "df['cpu_usable'] = (df['cpu_usable']-train_mean)/train_std"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 标准化结果展示\n",
    "plt.hist(df['cpu_usable'],100)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 由上图可知，数据分布较符合正态分布"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 划分输入输出；训练集测试集\n",
    "# 如果输入输出中包含缺失片段，则舍弃；"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 通过前面a个10分钟数据预测未来10分钟\n",
    "a = 144\n",
    "drop_time = np.array(df_drop['record_time'])\n",
    "train_input = []\n",
    "train_output = []\n",
    "test_input = []\n",
    "test_output = []\n",
    "for i in range(a,len(df)-1):\n",
    "    t = np.array(df['record_time'][i-a:i+1])\n",
    "    usable = np.array(df['cpu_usable'][i-a:i+1])\n",
    "    intersection = [j for j in t if j in drop_time]\n",
    "    if len(intersection) != 0:\n",
    "        continue\n",
    "    if df['record_time'][i-a:i+1][i-a] < datetime.datetime.strptime('2017-12-01','%Y-%m-%d'):\n",
    "        train_input.append(usable[0:a])\n",
    "        train_output.append(usable[-1])\n",
    "    if df['record_time'][i-a:i+1][i-a] > datetime.datetime.strptime('2017-12-01','%Y-%m-%d'):\n",
    "        test_input.append(usable[0:a])\n",
    "        test_output.append(usable[-1])\n",
    "train_input = np.array(train_input)\n",
    "train_output = np.array(train_output)\n",
    "test_input = np.array(test_input)\n",
    "test_output = np.array(test_output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(len(train_input))\n",
    "print(len(test_input))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 将数据转化为适合输入神经网络的三维数据\n",
    "train_input = np.reshape(train_input,(len(train_input),len(train_input[0]),1))\n",
    "test_input = np.reshape(test_input,(len(test_input),len(test_input[0]),1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#### 1层lstm与1层dense"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#model1 : output_shape取值16，32，64; 激活函数 tanh\n",
    "output_shape = 32\n",
    "act = 'tanh'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 构建神经网络层\n",
    "lstm_input = Input(shape=(len(train_input[0]),1),name='lstm_input')\n",
    "lstm_output = LSTM(output_shape, activation=act)(lstm_input)\n",
    "predictions = Dense(1, bias=True)(lstm_output)\n",
    "model1 = Model(input=lstm_input,output=predictions)\n",
    "model1.compile(optimizer='adam',loss='mse',metrics=['mse'])\n",
    "model1.fit(train_input,train_output,batch_size=512,nb_epoch=10,verbose=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 预测以及得到结果比对mse\n",
    "predictions=model1.predict(test_input)\n",
    "predictions = predictions.reshape(len(predictions))\n",
    "mean_squared_error(test_output,predictions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#model2 : output_shape取值16，32，64; 激活函数 2*sigmoid\n",
    "output_shape = 32\n",
    "def sigmoid_2(x):\n",
    "    return 2/(1+tf.exp(-x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 构建神经网络层\n",
    "lstm_input = Input(shape=(len(train_input[0]),1),name='lstm_input')\n",
    "lstm_output = LSTM(output_shape, activation=sigmoid_2)(lstm_input)\n",
    "predictions = Dense(1, bias=True)(lstm_output)\n",
    "model2 = Model(input=lstm_input,output=predictions)\n",
    "model2.compile(optimizer='adam',loss='mse',metrics=['mse'])\n",
    "model2.fit(train_input,train_output,batch_size=512,nb_epoch=10,verbose=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 预测以及得到结果比对mse\n",
    "predictions=model2.predict(test_input)\n",
    "predictions = predictions.reshape(len(predictions))\n",
    "mean_squared_error(test_output,predictions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#### 1层lstm与2层dense"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#model3 : output_shape取值16，32，64; 激活函数 tanh\n",
    "output_shape = 32\n",
    "act = 'tanh'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 构建神经网络层\n",
    "lstm_input = Input(shape=(len(train_input[0]),1),name='lstm_input')\n",
    "lstm_output = LSTM(output_shape, activation=act)(lstm_input)\n",
    "dense_1 = Dense(8)(lstm_output)\n",
    "predictions = Dense(1, bias=True)(dense_1)\n",
    "model3 = Model(input=lstm_input,output=predictions)\n",
    "model3.compile(optimizer='adam',loss='mse',metrics=['mse'])\n",
    "model3.fit(train_input,train_output,batch_size=512,nb_epoch=10,verbose=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 预测以及得到结果比对mse\n",
    "predictions=model3.predict(test_input)\n",
    "predictions = predictions.reshape(len(predictions))\n",
    "mean_squared_error(test_output,predictions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#model4 : output_shape取值16，32，64; 激活函数 2*sigmoid\n",
    "output_shape = 32\n",
    "def sigmoid_2(x):\n",
    "    return 2/(1+tf.exp(-x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 构建神经网络层\n",
    "lstm_input = Input(shape=(len(train_input[0]),1),name='lstm_input')\n",
    "lstm_output = LSTM(output_shape, activation=sigmoid_2)(lstm_input)\n",
    "dense_1 = Dense(8)(lstm_output)\n",
    "predictions = Dense(1, bias=True)(dense_1)\n",
    "model4 = Model(input=lstm_input,output=predictions)\n",
    "model4.compile(optimizer='adam',loss='mse',metrics=['mse'])\n",
    "model4.fit(train_input,train_output,batch_size=512,nb_epoch=10,verbose=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 预测以及得到结果比对mse\n",
    "predictions=model4.predict(test_input)\n",
    "predictions = predictions.reshape(len(predictions))\n",
    "mean_squared_error(test_output,predictions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 预测一段时间的容量剩余\n",
    "# point:选取预测集上的第几个输入数据作为待预测项\n",
    "# length:预测未来多少个10分钟\n",
    "point = 100\n",
    "length = 30\n",
    "data_input = list(test_input[point])\n",
    "data_output = test_output[point:point+length]\n",
    "data_predicted = []\n",
    "for i in range(length):\n",
    "    data_prediction = model1.predict(np.array([data_input[-240:]]))\n",
    "    data_input.append(data_prediction[0])\n",
    "    data_predicted.append(data_prediction[0][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 图像展示 预测值 蓝线；实际值 黄线\n",
    "plt.plot(data_predicted)\n",
    "plt.plot(data_output)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
